# -*- coding: utf-8 -*-
"""
spark  filter方法
"""

from pyspark import SparkConf, SparkContext
import os

os.environ['PYSPARK_PYTHON'] = "F:/projects/PycharmProjects/myprojects/pythonProject/venv/Scripts/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark_app")

sc = SparkContext(conf=conf)

rdd = sc.parallelize([1, 2, 3, 4, 5])

rdd1 = rdd.filter(lambda a: a % 2 == 0)

print(rdd1.collect())

# distinct  去重方法
print(sc.parallelize([1, 1, 2, 2, 3, 3, 4, 4]).distinct().collect())

# sortBy  排序方法
print(sc.parallelize([('a', 2, '你是猪'), ('c', 4, '你是狗'), ('b', 60, '你是驴'), ('g', 21, '你是马')])
      .sortBy(lambda a: a[2], ascending=True, numPartitions=1).collect())
sc.stop()
